####January 27, 2020

### Two data files are associated with this script:
#women_highcourts_jan_27_2021.csv is the independent and matching variable data
#DV_data.csv is the dependent variable data. 
#The DV data includes (1) data collected by a team of research assistants managed by Leeann Bass from newspapers, court websites, etc as well as 
# (2) data provided by country experts through the Varieties of Democracy expert Survey.
#From the expert survey, if year of first woman is listed as 2017, we interpret that to mean one of the following: there has been no woman justice yet (survey fielded in 2017) or the first woman was selected in 2017 

#Table 1, descriptive stats: Lines 46-65
#Figure 4, Balance: Lines 460-551
#Figure 5, Permutation Plot, Increase in Actors, Left panel: Lines 92-332
#Figure 5, Right Panel, moderate pressure: Lines 335-457
#Figure 6, Permutation Plot, Decrease in Actors, Left Panel: Lines 555-765
#Figure 6, Moderate pressure, Right Panel: Lines 771-889
#Figure 7, Permutation Plot, Increase or Decrease in Actors: Lines 988-1222
##Analysis, Any change in process: Lines 1318-1459
#Figure A1, Balance, Decrease: Lines 892-984
#Figure A2, Balance, Increase or Decrease: Lines 1225-1314
##Figure A3, Balance, Any change : Lines 1462-1547
##Table A4, Descriptive Stats in sample v out of sample, Lines
##Footnote 19: Lines 1551-1628
##Figure A4, Treatment as an increase in nominators: Lines 1631-1836
##Figure A5, Treatment as an increase in the number of selectors: Lines 1839-2034
##Table A4, Comparing in sample to out of sample descriptives: Lines 2036-2112

##Comparisons/treatments
#Analysis 1: Treatment is defined as an increase in actors
#Sample is exact matched on pre-treatment number of nominators and number of selectors and year and then PSM matched on several other variables.
#Analysis 2: Treatment is defined as a decrease in actors
#sample is exact matched on pre treatment number of nominators and number of selectors and year then PSM matched on several other variables.
#Analysis 3: Treatment is defined as either an increase or a decrease in actors
#sample is exact matched on year and type of event (new constitution, amendment, or interim constitution) then PSM matched on several other variables.
#Analysis 4: Treatment is defined as any change to the selection process for justices
#sample is exact matched on year and PSM matched on several other variables

library(foreign)
highcourts<- read.csv("women_highcourts_jan_27_2021.csv")
highcourts_full<-highcourts
##dropping cases with NAs for the variable number of nominators filled. 
##Cases with missing values there are only used for descriptive comparisons of in-sample data to out of sample or OECD sample data for table A4.
highcourts<-highcourts[which(highcourts$number_nominators_filled>=0),]

####Summary Statistics: Table 2####w
#install.packages("stargazer")
library(stargazer)
highcourts_descriptive<-highcourts[c("year",
                                     "number_nominators_filled",
                                     "number_selectors_filled",
                                     "percent_women_filled",
                                     "since_UNIVSFFRG",   
                                     "v2jupack_filled", 
                                     "v2cldiscw_filled",
                                     "v2clprptyw_filled",
                                     "v2x_partipdem_filled",
                                     "v2pepwrgen_filled",
                                     "v2clacjstw_filled",
                                     "v2clslavef_filled",
                                     "v2cldmovew_filled",
                                     "v2mefemjrn_filled",
                                     "v2csgender_filled",
                                     "v2x_egaldem_filled",
                                     "v2xcl_rol_filled"
)]
stargazer(highcourts_descriptive, omit.summary.stat=c("n", "p25","p75"))


#v2jupack_filled: court packing index
#v2x_partipdem_filled: participatory democracy index
#v2x_egaldem_filled:egalitarian democracy index
#v2xcl_rol_filled: equality before the law and individual liberty
#v2x_gender_filled: women's political empowerment index'
#v2csgender_filled: Women's participation, Civil Soc Orgs
#v2mefemjrn_filled: Percent Female journalists
#v2cldmovew_filled: freedom of domestic movement for women
#v2cldiscw_filled: freedom of discussion for women 
#v2clslavef_filled: freedom from forced labor for women
# v2clprptyw_filled: Property Rights for women
#v2clacjstw_filled: access to justice for women
#v2pepwrgen_filled: Power distributed by gender


###Reading in Dv data. Will drop observations that have already had a woman since we are testing for time until FIRST woman

DV_data<-read.csv("DV_data.csv")

#subset to the variables needed for matching
DV_data_small<-DV_data[c("state", "year","has_there_ever_been_wom")]



####ANALYSIS 1 (Increase in Actors):####
#Treatment as an Increase in the number of nominators/selectors
#### Two stage matching, exact matched on pre-change institutions and year

#subset of variables needed for matching
#Treated1: Treatment as increase in the number of actors
highcourts1<-highcourts[c("state", "year", "record_id", "treated1", 
                          "since_UNIVSFFRG", "v2x_partipdem_filled", "v2x_egaldem_filled", "v2xcl_rol_filled",	"v2x_gender_filled",	
                          "v2jupack_filled","v2csgender_filled" ,	"v2mefemjrn_filled",	"v2cldmovew_filled",	"v2cldiscw_filled",	"v2clslavef_filled",
                          "v2clprptyw_filled",	"v2clacjstw_filled",	"v2pepwrgen_filled" , "drop_from_match1", "percent_women_filled", "nominators_to_match1","selectors_to_match1")]


#merge
highcourts1<-merge(highcourts1, DV_data_small, by=c("state", "year"), all.x=TRUE)


#Dropping observations where we know there has been a woman already.
#So keeping observations where we know there hasn't been a woman or data is missing
#Missingness for has_there_ever_been_wom more likely to stem from missing confirmation that there has NOT been a woman than missing the presence of a woman, so treating NAs as no woman yet.

highcourts1<-subset(highcourts1, has_there_ever_been_wom==0 | is.na(has_there_ever_been_wom)) 
# treated=2 for units that have been treated twice. We care about the first treatment. 
treated1_countries<-(highcourts1[which(highcourts1$treated==1),])
##need to drop observations with state id of 80 and 81 -- German Democratic Republic and German Federal Republic. 
#Both had women by the 50s but merged in as Germnay (state==82)
highcourts1<-subset(highcourts1, state!=80 & state!=81)



#dropping countries that shouldn't be matches:
#In particular, we do not want any treated units to be control units for other countries, either before or after their treatment (marked as drop_from_match=1)
highcourts1<-highcourts1[which(highcourts1$drop_from_match1!=1),] #4069 to 3076


#Matching in two stages: first, exact matching on year and pre-treatment institutions. 
#Then, we will use full match in the optmatch package to require matching within strata.


#install.packages("cem")
#install.packages("MatchIt")
library(cem)
library("MatchIt")

#Subsetting to just the variables we need to exact match b/c cem doesn't like NAs
highcourts1_cem_subset<-highcourts1[, c("record_id", "nominators_to_match1", "selectors_to_match1", "year", "treated1", "drop_from_match1", "state")]


#generating bin cutpoints
yearcut<-c(1969.1, 1970.1, 1971.1, 1972.1, 1973.1, 1974.1, 1975.1, 1976.1, 1977.1, 1978.1, 1979.1, 1980.1, 1981.1, 1982.1, 1983.1, 1984.1, 1985.1, 1986.1, 1987.1, 1988.1, 1989.1, 1990.1, 1991.1, 1992.1, 1993.1, 1994.1, 1995.1, 1996.1, 1997.1, 1998.1, 1999.1, 2000.1, 2001.1, 2002.1, 2003.1, 2004.1, 2005.1, 2006.1, 2007.1, 2008.1, 2009.1, 2010.1)
nominatorscut<- c(-.1, .9, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1, 8.1, 9.1) 
selectorscut<- c(-.1, .9, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1)

#coarsened exact matching
cem.1 <- matchit(treated1 ~ year + nominators_to_match1 + selectors_to_match1, 
                 data = highcourts1_cem_subset, method = "cem", 
                 cutpoints = list(year=yearcut, nominators_to_match1=nominatorscut, selectors_to_match1=selectorscut))


#How many treated units matched?
cem.1$nn #27 of 30 matched units

#cem.1 includes is only the years and countries that matched. 
#saving as a new object
cem.data2017_tr1 <- match.data(cem.1)


#merging other matching variables for the second stage matching with the CEM output
cem.data2017_tr1<-merge(cem.data2017_tr1, highcourts1, by= c("record_id", "nominators_to_match1","selectors_to_match1", "year", "treated1", "drop_from_match1", "state"), all.x=TRUE) # n=834


#install.packages("optmatch")
#install.packages("RItools")
library(optmatch)
library(RItools)

#propensity score matching with optmatch
psm<-glm(treated1~ percent_women_filled 
         +since_UNIVSFFRG   
         +v2jupack_filled
         +v2x_partipdem_filled
         +v2x_egaldem_filled
         + v2xcl_rol_filled
         +v2x_gender_filled
         +v2csgender_filled
         +v2mefemjrn_filled
         +v2cldmovew_filled
         +v2cldiscw_filled
         +v2clslavef_filled
         +v2clprptyw_filled
         +v2clacjstw_filled
         +v2pepwrgen_filled
         +strata(subclass), family="binomial", data=cem.data2017_tr1)

ps.pm <- pairmatch(psm, data = cem.data2017_tr1)
summary(ps.pm)
#



#pulling matches indicator from optmatch into the dataframe
all.equal(names(ps.pm), row.names(cem.data2017_tr1)) #TRUE
matched_pairs_tr1<-cbind(cem.data2017_tr1, matches=ps.pm)

#Keeping version with all data for generating balance plot later
matched_pairs_tr1_full<-matched_pairs_tr1


#Now have the matched data set 

##using qual CI to generate permutation distributions ##
#install.packages("qualCI")
library("qualCI")
#citation("qualCI")

#Adding back in the DV
matched_pairs_tr1<-merge(x=matched_pairs_tr1, y=DV_data, by=c("state", "year"), all.x=TRUE, all.y=FALSE)


##NAs messing things up. Making the matching indicator numeric so I can replace NAs with 0 (so 0= not matched)
matched_pairs_tr1$matched <-as.numeric(matched_pairs_tr1$matches)
matched_pairs_tr1$matched[is.na(matched_pairs_tr1$matched)]<-0

#Want just the matched pairs
just_matched_tr1_full<-matched_pairs_tr1[which(matched_pairs_tr1$matched!= 0),] ##use this object for the comparison of treatment values at the end of the file
just_matched_tr1<-just_matched_tr1_full[, c("record_id", "year","treated1", "matched", "year_first_wom")]

#For countries who have not yet had a woman, replacing year of first woman as 2017 (current year at time of analysis) in order to calculate the time from treatment until the first woman.
just_matched_tr1$year_first_wom[which(just_matched_tr1$year_first_wom==9999)]<-2017.0

#reshape wide
wide_matched_tr1 <- reshape(just_matched_tr1, 
                            timevar = "treated1",
                            idvar = c("matched", "year"),
                            direction = "wide")


#generating years to first woman since time of treatment variable 
wide_matched_tr1$time_to_wom.1 <- wide_matched_tr1$year_first_wom.1 - wide_matched_tr1$year
wide_matched_tr1$time_to_wom.0<-wide_matched_tr1$year_first_wom.0-wide_matched_tr1$year
wide_matched_tr1$diff_treated_minus_control<-wide_matched_tr1$time_to_wom.1-wide_matched_tr1$time_to_wom.0


#reshape to long
long_matched_tr1<-reshape(wide_matched_tr1,
                          varying=c("record_id.1", 
                                    "record_id.0", 
                                    "year_first_wom.1",
                                    "year_first_wom.0",
                                    "time_to_wom.1", 
                                    "time_to_wom.0"),
                          idvar = c("matched", "year"),
                          direction="long")

#install.packages("dplyr")
library("dplyr")
long_matched_tr1<-rename(long_matched_tr1, treated1=time)

### Drop out the NAs
long_matched_tr1<-na.omit(long_matched_tr1)#54 to 30
long_matched_tr1_original<-long_matched_tr1 #for later analysis (combing sampes from tr1 and tr2)
### Create a new column called "winner" (which country in the pair had a woman selected first)
long_matched_tr1$winner<-"NA"
for (i in 1:nrow(long_matched_tr1))
{
  for (j in 1:nrow(long_matched_tr1))
  {
    if(j!=i)
    {
      if(long_matched_tr1$matched[i]==long_matched_tr1$matched[j])
      {
        if(long_matched_tr1$year_first_wom[i]<long_matched_tr1$year_first_wom[j])
        {
          long_matched_tr1$winner[i]<-1
        }
        else if(long_matched_tr1$year_first_wom[i]>long_matched_tr1$year_first_wom[j])
        {
          long_matched_tr1$winner[i]<-0
        }
        else if(long_matched_tr1$year_first_wom[i]==long_matched_tr1$year_first_wom[j])
        {
          long_matched_tr1$winner[i]<-"NA"
        }
        
      }
    }
  }
}

### Create a value called absolute value that is the abs value of the difference in time from treatment until first woman
long_matched_tr1$abs_val<-abs(long_matched_tr1$diff_treated_minus_control)

###Generate a column called between set rank, which will hold the ranks of the pairs in terms of size of the difference between treatment and control within a pair
###Spilit into "winner" and "loser" column
winner_tr1<-subset(long_matched_tr1,long_matched_tr1$winner==1)
loser_tr1<-subset(long_matched_tr1,long_matched_tr1$winner==0)
###Sort the subsets by "time_to_wom"
winner_tr1<-winner_tr1[rev(order(winner_tr1$time_to_wom)),]
loser_tr1<-loser_tr1[rev(order(loser_tr1$time_to_wom)),]
###Create the column called between set rank
winner_tr1$between_set_rank<-rank(winner_tr1$abs_val,ties.method="first")
loser_tr1$between_set_rank<-rank(loser_tr1$abs_val,ties.method="first")

###Merge the subsets again
all_tr1<-rbind(winner_tr1,loser_tr1)

###Create a column called pair and it is identical to between_set_rank
all_tr1$pair<-all_tr1$between_set_rank

###Sort the final dataset by pair
all_tr1_sorted<-all_tr1[order(all_tr1$pair),]
##need "winner" variable to be numeric
all_tr1_sorted$winner<-as.numeric(all_tr1_sorted$winner)


all_tr1_sorted<-all_tr1_sorted[, c("record_id", "treated1", "winner", "between_set_rank","pair")]

table(all_tr1_sorted$pair)# 14 pairs

between_rank_vector1<-c(1:14)


long_rank_quade_tr1<-quade.prep(data=all_tr1_sorted, 
                                set="pair",
                                treatment="treated1",
                                withinRank = "winner",
                                unit="record_id",
                                betweenRank = between_rank_vector1)

qout<-quade(long_rank_quade_tr1)
qout

####Plot: permuatation distribution####
#pdf("TR_Increase_Exact_14pair_Jan2021.pdf", height=5.15, width=5 ) 
par(mfrow=c(1,1), mar=c(5, 5, 5, 5), family="serif")
hist(qout$Q, main="Permuatation Distribution, Increase in Actors", xlab="Signed Rank Statistic", col=NULL)
segments(qout$Qobs, 0, qout$Qobs, y1=7000000, lty = 2, lwd = 2, col = "red")
text(85,750, "p=.097")
#text(15, 6000, "Full Sample")
#text(15, 5600, "14 Pairs")
mtext("Full Sample")
#dev.off()


####Moderate Pressure####

##We theorize that the effects will be greatest at moderate social pressure for diversity.
#pulling in the variable that is the percent_women in legis office (filled/permuted)

percent_wom<-highcourts[, c("record_id", "percent_women_filled")]
long_matched_tr1_moderate<-merge(long_matched_tr1, percent_wom, by="record_id", all.x=TRUE, all.y=FALSE)

#Defining moderate pressure as anything above the median at the start of our sample in 1970
summary(highcourts$percent_women_filled[which(highcourts$year==1970)],)
#median=2.8%

#reshape

wide_matched_tr1_moderate <- reshape(long_matched_tr1_moderate, 
                                     timevar = "treated1",
                                     idvar = c("matched", "year", "diff_treated_minus_control", "abs_val"),
                                     direction = "wide")

#identifying pairs where both countries have "moderate" pressure to diversify
wide_matched_tr1_moderate$moderate[which(wide_matched_tr1_moderate$percent_women_filled.0>2.8 & wide_matched_tr1_moderate$percent_women_filled.1>2.8)]<-1
table(wide_matched_tr1_moderate$moderate, wide_matched_tr1_moderate$matched)
#pairs for 1970 median 2,7,8,9,10,12,13,16, 19


wide_matched_tr1_moderate<-wide_matched_tr1_moderate[which(wide_matched_tr1_moderate$moderate==1),]
wide_matched_tr1_moderate$moderate<-NULL 
wide_matched_tr1_moderate$percent_women_filled.1<-NULL
wide_matched_tr1_moderate$percent_women_filled.0<-NULL


#reshape long
long_matched_tr1_moderate<- reshape(wide_matched_tr1_moderate,
                                    varying=c("record_id.0" ,
                                              "record_id.1",
                                              "time_to_wom.0",
                                              "time_to_wom.1" ,
                                              "winner.0",
                                              "winner.1",
                                              "year_first_wom.0",
                                              "year_first_wom.1"),
                                    idvar = c("abs_val","diff_treated_minus_control","matched","year" ),
                                    direction="long")

long_matched_tr1_moderate<-rename(long_matched_tr1_moderate, treated1=time)


##need to re-rank
long_matched_tr1_moderate$winner<-"NA"
for (i in 1:nrow(long_matched_tr1_moderate))
{
  for (j in 1:nrow(long_matched_tr1_moderate))
  {
    if(j!=i)
    {
      if(long_matched_tr1_moderate$matched[i]==long_matched_tr1_moderate$matched[j])
      {
        if(long_matched_tr1_moderate$year_first_wom[i]<long_matched_tr1_moderate$year_first_wom[j])
        {
          long_matched_tr1_moderate$winner[i]<-1
        }
        else if(long_matched_tr1_moderate$year_first_wom[i]>long_matched_tr1_moderate$year_first_wom[j])
        {
          long_matched_tr1_moderate$winner[i]<-0
        }
        else if(long_matched_tr1_moderate$year_first_wom[i]==long_matched_tr1_moderate$year_first_wom[j])
        {
          long_matched_tr1_moderate$winner[i]<-"NA"
        }
        
      }
    }
  }
}

### Create a value called absolute value
long_matched_tr1_moderate$abs_val<-abs(long_matched_tr1_moderate$diff_treated_minus_control)

###Generate a column called between set rank
###Spilit into winner loser column
winner_tr1_moderate<-subset(long_matched_tr1_moderate,long_matched_tr1_moderate$winner==1)
loser_tr1_moderate<-subset(long_matched_tr1_moderate,long_matched_tr1_moderate$winner==0)
###Sort the subsets by "time_to_wom"
winner_tr1_moderate<-winner_tr1_moderate[rev(order(winner_tr1_moderate$time_to_wom)),]
loser_tr1_moderate<-loser_tr1_moderate[rev(order(loser_tr1_moderate$time_to_wom)),]
###Create the column called between set rank
winner_tr1_moderate$between_set_rank<-rank(winner_tr1_moderate$abs_val,ties.method="first")
loser_tr1_moderate$between_set_rank<-rank(loser_tr1_moderate$abs_val,ties.method="first")

###Merge the subsets again
all_tr1_moderate<-rbind(winner_tr1_moderate,loser_tr1_moderate)

###Create a column called pair and it is identical to between_set_rank
all_tr1_moderate$pair<-all_tr1_moderate$between_set_rank

###Sort the final dataset by pair
all_tr1_sorted_moderate<-all_tr1_moderate[order(all_tr1_moderate$pair),]
##need "winner" variable to be numeric
all_tr1_sorted_moderate$winner<-as.numeric(all_tr1_sorted_moderate$winner)

####QualCI
all_tr1_sorted_moderate<-all_tr1_sorted_moderate[, c("record_id", "treated1", "winner", "between_set_rank","pair")]
table(all_tr1_sorted_moderate$pair)
between_rank_vector1_m<-c(1:8)

long_rank_quade_tr1_moderate<-quade.prep(data=all_tr1_sorted_moderate, 
                                         set="pair",
                                         treatment="treated1",
                                         withinRank = "winner",
                                         unit="record_id",
                                         betweenRank = between_rank_vector1_m)

qout<-quade(long_rank_quade_tr1_moderate)
qout
####Plot: Permutation Distribution####
#permutation distribution plot
#pdf("Tr_inc_exact_matched_moderate_pres_jan2020.pdf", height=5.15, width=5 ) 
par(mfrow=c(1,1), mar=c(5, 5, 5, 5), family="serif")
hist(qout$Q, main="Permutation Distribution, Increase in Actors", xlab="Signed Rank Statistic", col=NULL)
segments(qout$Qobs, 0, qout$Qobs, y1=70, lty = 2, lwd = 2, col = "red")
text(32,35, "p=.156")
mtext("Moderate Pressure Sample")
#dev.off()


####Balance####
#make matched pairs numeric
matched_pairs_tr1_full$matched<-as.numeric(matched_pairs_tr1_full$matches)
matched_pairs_tr1_full$matched[is.na(matched_pairs_tr1_full$matched)] <- 0


#need a vector of the variables that we need
var_names<-   c("year",
                "nominators_to_match1",
                "selectors_to_match1", 
                "percent_women_filled",
                "since_UNIVSFFRG",
                "v2jupack_filled",
                "v2cldiscw_filled",
                "v2clprptyw_filled",
                "v2csgender_filled",
                "v2pepwrgen_filled",
                "v2x_gender_filled",
                "v2xcl_rol_filled",
                "v2clacjstw_filled",
                "v2cldmovew_filled",
                "v2clslavef_filled",
                "v2mefemjrn_filled",
                "v2x_egaldem_filled",
                "v2x_partipdem_filled")
matched_sdm<-NULL
all_sdm<-NULL
balance<-as.data.frame(cbind(var_names, matched_sdm, all_sdm))


for (i in 1:length(var_names)){
  balance$matched_sdm[i]<- (((mean(na.omit(matched_pairs_tr1_full[matched_pairs_tr1_full$treated1==1, var_names[i]]))) 
                             -
                               (mean(na.omit(matched_pairs_tr1_full[matched_pairs_tr1_full$treated1==0 & matched_pairs_tr1_full$matched!=0, var_names[i]]))))
                            /
                              (sqrt(var(na.omit(matched_pairs_tr1_full[matched_pairs_tr1_full$matched!=0, var_names[i]])))))
}

for (i in 1:length(var_names)){
  balance$all_sdm[i]<- (((mean(na.omit(highcourts[highcourts$treated1==1, var_names[i]]))) 
                         -
                           (mean(na.omit(highcourts[highcourts$treated1==0, var_names[i]]))))
                        /
                          (sqrt(var(na.omit(highcourts[, var_names[i]])))))
}


balance_variables<-c("Year", 
                     "Number of Nominators", 
                     "Number of Selectors",
                     "Percent Women in Parliament", 
                     "Years since Univ. Suffrage", 
                     "Court Packing Index", 
                     "Freedom of Discussion for Women", 
                     "Property Rights for Women", 
                     "Women's Civil Soc. Participation", 
                     "Power Distributed by Gender",
                     "Women Political Empowerment", 
                     "Equality Before the Law", 
                     "Access to Justice for Women", 
                     "Freedom Domestic Mvmt for Women", 
                     "Freedom from Forced Labor for Women", 
                     "Female Journalists", 
                     "Egalitarian Democracy Index",
                     "Participatory  Democracy Index" 
)



#PLOT #
#pdf("Balance_Plot_Tretment_as_Increase_jan2021.pdf", height=5, width=10 ) 
par(mfrow=c(1,1), mar=c(5, 18, 5, 5), family="serif")
y.axis <- c(length(balance$matched_sdm):1)
plot(balance$matched_sdm, y.axis,
     type = "p",
     axes = FALSE,
     xlab = "",
     ylab = "",
     pch = 20,
     cex = 1.2,
     xlim = c(-1,1),
     main = "Standardized Difference in Means",
     sub="Difference between Treated and Untreated Means"
)

segments(0, 0, x1=0, y1=19, lty = 2, lwd = 2, col = "grey")
axis(2, at = y.axis, label = balance_variables, las = 1, tick = T, mgp = c(2,.6,0), cex.axis = 1)
axis(1, mgp = c(0,.6,.75))
points(balance$all_sdm, y.axis, pch=1, cex=1)
legend(x= .2, y=18, c("Matched Data", "All Data"), pch=c(20, 1), cex=.8,  pt.cex=.8, bty = "n")
mtext("Balance, Treatment as Increase")
#dev.off()



####ANALYSIS 2 (Decrease in Actors):####
#Treatment as a Decrease in the number of nominators/selectors
#### Two stage matching, exact matched on pre-change institutions and year

#subset of variables needed for matching
#treated2: Treatment as a decrease in the number of actors
highcourts2<-highcourts[c("state", "year", "record_id", "treated2", 
                          "since_UNIVSFFRG", "v2x_partipdem_filled", "v2x_egaldem_filled", "v2xcl_rol_filled",	"v2x_gender_filled",	
                          "v2jupack_filled","v2csgender_filled" ,	"v2mefemjrn_filled",	"v2cldmovew_filled",	"v2cldiscw_filled",	"v2clslavef_filled",
                          "v2clprptyw_filled",	"v2clacjstw_filled",	"v2pepwrgen_filled" ,"drop_from_match2",
                          "percent_women_filled", "nominators_to_match2","selectors_to_match2")]

#merge DV
highcourts2<-merge(highcourts2, DV_data, by=c("state", "year"), all.x=TRUE)



highcourts2<-subset(highcourts2, has_there_ever_been_wom==0 | is.na(has_there_ever_been_wom))

##dropping observations with state id of 80 and 81 -- german democratic republic and german federal republic. Both had women by the 50s but merged in as germnay (state==82)
highcourts2<-subset(highcourts2, state!=80 & state!=81)

#dropping countries that shouldnt be mathces
highcourts2<-highcourts2[which(highcourts2$drop_from_match2!=1),] 
treated2_countries<-highcourts2[which(highcourts2$treated2==1),]

#Matching in two stages: first exact matching on year and pre-treatment institutions. 
#Then, we will use full match in the optmatch package to require matching within strata 

#subset to just the variables we need to exact match (cem isnt fond of NAs)
highcourts2_cem_subset<-highcourts2[, c("record_id", "nominators_to_match2", "selectors_to_match2", "year", "treated2", "drop_from_match2", "state")]
highcourts2_cem_subset<-na.omit(highcourts2_cem_subset) 

#generating bin cutpoints
yearcut<-c(1969.1, 1970.1, 1971.1, 1972.1, 1973.1, 1974.1, 1975.1, 1976.1, 1977.1, 1978.1, 1979.1, 1980.1, 1981.1, 1982.1, 1983.1, 1984.1, 1985.1, 1986.1, 1987.1, 1988.1, 1989.1, 1990.1, 1991.1, 1992.1, 1993.1, 1994.1, 1995.1, 1996.1, 1997.1, 1998.1, 1999.1, 2000.1, 2001.1, 2002.1, 2003.1, 2004.1, 2005.1, 2006.1, 2007.1, 2008.1, 2009.1, 2010.1)
nominatorscut<- c(-.1, .9, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1, 8.1, 9.1) 
selectorscut<- c(-.1, .9, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1)

#coarsened exact matching
cem.2 <- matchit(treated2 ~ year + nominators_to_match2 + selectors_to_match2, 
                 data = highcourts2_cem_subset, method = "cem", 
                 cutpoints = list(year=yearcut, nominators_to_match2=nominatorscut, selectors_to_match2=selectorscut))


#treated2units matchd.
cem.2$nn #14 of 20 matched units


#cem.2  is only the years and countries that matched. 
cem.data2017_tr2 <- match.data(cem.2)







#merging other matching variables for the second stage of matching
cem.data2017_tr2<-merge(cem.data2017_tr2, highcourts2, by= c("record_id", "nominators_to_match2","selectors_to_match2", "year", "treated2", "drop_from_match2", "state"), all.x=TRUE) # n=203


#propensity score matching with optmatch
psm2<-glm(treated2~ percent_women_filled 
          +since_UNIVSFFRG   
          +v2jupack_filled
          +v2x_partipdem_filled
          +v2x_egaldem_filled
          + v2xcl_rol_filled
          +v2x_gender_filled
          +v2csgender_filled
          +v2mefemjrn_filled
          +v2cldmovew_filled
          +v2cldiscw_filled
          +v2clslavef_filled
          +v2clprptyw_filled
          +v2clacjstw_filled
          +v2pepwrgen_filled
          +strata(subclass), family="binomial", data=cem.data2017_tr2)

ps.pm2 <- pairmatch(psm2, data = cem.data2017_tr2)
#summary(ps.pm2)
#print(ps.pm, grouped=TRUE) 

#pulling matching indicator from optmatch into the dataframe
all.equal(names(ps.pm2), row.names(cem.data2017_tr2)) #TRUE
matched_pairs_tr2<-cbind(cem.data2017_tr2, matches=ps.pm2)

#Keeping version with all data for generating balance plot later
matched_pairs_tr2_full<-matched_pairs_tr2


#Making the matching indicator numeric so I can replace NAs with 0s
matched_pairs_tr2$matched <-as.numeric(matched_pairs_tr2$matches)
matched_pairs_tr2$matched[is.na(matched_pairs_tr2$matched)]<-0

#keeping only the matched pairs
just_matched_tr2<-matched_pairs_tr2[which(matched_pairs_tr2$matched!= 0),]


#reshape wide
wide_matched_tr2 <- reshape(just_matched_tr2, 
                            timevar = "treated2",
                            idvar = c("matched", "year"),
                            direction = "wide")

#generating years to woman variable
wide_matched_tr2$time_to_wom.1 <- wide_matched_tr2$year_first_wom.1 - wide_matched_tr2$year
wide_matched_tr2$time_to_wom.0<-wide_matched_tr2$year_first_wom.0-wide_matched_tr2$year
wide_matched_tr2$diff_treated_minus_control<-wide_matched_tr2$time_to_wom.1-wide_matched_tr2$time_to_wom.0

wide_matched_tr2<-wide_matched_tr2[,c("record_id.1", 
                                      "record_id.0", 
                                      "year_first_wom.1",
                                      "year_first_wom.0",
                                      "time_to_wom.1", 
                                      "time_to_wom.0",
                                      "diff_treated_minus_control",
                                      "matched",
                                      "year")]

long_matched_tr2<-reshape(wide_matched_tr2,
                          varying=c("record_id.1", 
                                    "record_id.0", 
                                    "year_first_wom.1",
                                    "year_first_wom.0",
                                    "time_to_wom.1", 
                                    "time_to_wom.0"),
                          idvar = c("matched", "year"),
                          direction="long")

long_matched_tr2<-rename(long_matched_tr2, treated2=time)
### Drop out the NAs
long_matched_tr2<-na.omit(long_matched_tr2)
long_matched_tr2_original<-long_matched_tr2 #for later analysis)
### Create a new column called "winner"
long_matched_tr2$winner<-"NA"
for (i in 1:nrow(long_matched_tr2))
{
  for (j in 1:nrow(long_matched_tr2))
  {
    if(j!=i)
    {
      if(long_matched_tr2$matched[i]==long_matched_tr2$matched[j])
      {
        if(long_matched_tr2$year_first_wom[i]<long_matched_tr2$year_first_wom[j])
        {
          long_matched_tr2$winner[i]<-1
        }
        else if(long_matched_tr2$year_first_wom[i]>long_matched_tr2$year_first_wom[j])
        {
          long_matched_tr2$winner[i]<-0
        }
        else if(long_matched_tr2$year_first_wom[i]==long_matched_tr2$year_first_wom[j])
        {
          long_matched_tr2$winner[i]<-"NA"
        }
        
      }
    }
  }
}

### Create a value called absolute value
long_matched_tr2$abs_val<-abs(long_matched_tr2$diff_treated_minus_control)

###Generate a column called between set rank
###Spilit into winner loser column
winner_tr2<-subset(long_matched_tr2,long_matched_tr2$winner==1)
loser_tr2<-subset(long_matched_tr2,long_matched_tr2$winner==0)
###Sort the subsets by "time_to_wom"
winner_tr2<-winner_tr2[rev(order(winner_tr2$time_to_wom)),]
loser_tr2<-loser_tr2[rev(order(loser_tr2$time_to_wom)),]
###Create the column called between set rank
winner_tr2$between_set_rank<-rank(winner_tr2$abs_val,ties.method="first")
loser_tr2$between_set_rank<-rank(loser_tr2$abs_val,ties.method="first")

###Merge the subsets again
all_tr2<-rbind(winner_tr2,loser_tr2)

###Create a column called pair and it is identical to between_set_rank
all_tr2$pair<-all_tr2$between_set_rank

###Sort the final dataset by pair
all_tr2_sorted<-all_tr2[order(all_tr2$pair),]
##need "winner" variable to be numeric
all_tr2_sorted$winner<-as.numeric(all_tr2_sorted$winner)

all_tr2_sorted<-all_tr2_sorted[, c("record_id", "treated2", "winner", "between_set_rank","pair")]

table(all_tr2_sorted$pair)

between_rank_vector1<-c(1:4)

long_rank_quade_tr2<-quade.prep(data=all_tr2_sorted, 
                                set="pair",
                                treatment="treated2",
                                withinRank = "winner",
                                unit="record_id",
                                betweenRank = between_rank_vector1)

qout<-quade(long_rank_quade_tr2)
qout

####Plot: Permutation Distribution####
#pdf("TR_dec_actor_exact_matched_4pair_jan2021s.pdf", height=5, width=5 ) 
par(mfrow=c(1,1), mar=c(5, 5, 5, 5), family="serif")
hist(qout$Q, main="Permuatation Distribution, Decrease in Actors", xlab="Signed Rank Statistic", col=NULL)
segments(qout$Qobs, 0, qout$Qobs, y1=7, lty = 2, lwd = 2, col = "red")
text(9,2.4, "p=.063")
mtext("Full Sample")
#dev.off()





####Moderate Pressure####

#pulling in the variable that is the percent_women in parliament (filled/permuted)
long_matched_tr2_moderate<-merge(long_matched_tr2, percent_wom, by="record_id", all.x=TRUE, all.y=FALSE)

#Defining moderate pressure as anything above the median at the start of our sample in 1970
summary(highcourts$percent_women_filled[which(highcourts$year==1970)],)
#median=2.8%

#resahpe to deterine which pairs have both countries above the moderate threshold cutoff
wide_matched_tr2_moderate <- reshape(long_matched_tr2_moderate, 
                                     timevar = "treated2",
                                     idvar = c("matched", "year", "diff_treated_minus_control", "abs_val"),
                                     direction = "wide")

wide_matched_tr2_moderate$moderate[which(wide_matched_tr2_moderate$percent_women_filled.0>2.8 & wide_matched_tr2_moderate$percent_women_filled.1>2.8)]<-1

#table(wide_matched_tr2_moderate$moderate, wide_matched_tr2_moderate$matched)
#2, 3, 13


wide_matched_tr2_moderate<-wide_matched_tr2_moderate[which(wide_matched_tr2_moderate$moderate==1),]
wide_matched_tr2_moderate$moderate<-NULL 
wide_matched_tr2_moderate$percent_women_filled.1<-NULL
wide_matched_tr2_moderate$percent_women_filled.0<-NULL


#reshape long
long_matched_tr2_moderate<- reshape(wide_matched_tr2_moderate,
                                    varying=c("record_id.0" ,
                                              "record_id.1",
                                              "time_to_wom.0",
                                              "time_to_wom.1" ,
                                              "winner.0",
                                              "winner.1",
                                              "year_first_wom.0",
                                              "year_first_wom.1"),
                                    idvar = c("abs_val","diff_treated_minus_control","matched","year" ),
                                    direction="long")

long_matched_tr2_moderate<-rename(long_matched_tr2_moderate, treated2=time)


##need to re-rank
long_matched_tr2_moderate$winner<-"NA"
for (i in 1:nrow(long_matched_tr2_moderate))
{
  for (j in 1:nrow(long_matched_tr2_moderate))
  {
    if(j!=i)
    {
      if(long_matched_tr2_moderate$matched[i]==long_matched_tr2_moderate$matched[j])
      {
        if(long_matched_tr2_moderate$year_first_wom[i]<long_matched_tr2_moderate$year_first_wom[j])
        {
          long_matched_tr2_moderate$winner[i]<-1
        }
        else if(long_matched_tr2_moderate$year_first_wom[i]>long_matched_tr2_moderate$year_first_wom[j])
        {
          long_matched_tr2_moderate$winner[i]<-0
        }
        else if(long_matched_tr2_moderate$year_first_wom[i]==long_matched_tr2_moderate$year_first_wom[j])
        {
          long_matched_tr2_moderate$winner[i]<-"NA"
        }
        
      }
    }
  }
}

### Create a value called absolute value
long_matched_tr2_moderate$abs_val<-abs(long_matched_tr2_moderate$diff_treated_minus_control)

###Generate a column called between set rank
###Spilit into winner loser column
winner_tr2_moderate<-subset(long_matched_tr2_moderate,long_matched_tr2_moderate$winner==1)
loser_tr2_moderate<-subset(long_matched_tr2_moderate,long_matched_tr2_moderate$winner==0)
###Sort the subsets by "time_to_wom"
winner_tr2_moderate<-winner_tr2_moderate[rev(order(winner_tr2_moderate$time_to_wom)),]
loser_tr2_moderate<-loser_tr2_moderate[rev(order(loser_tr2_moderate$time_to_wom)),]
###Create the column called between set rank
winner_tr2_moderate$between_set_rank<-rank(winner_tr2_moderate$abs_val,ties.method="first")
loser_tr2_moderate$between_set_rank<-rank(loser_tr2_moderate$abs_val,ties.method="first")

###Merge the subsets again
all_tr2_moderate<-rbind(winner_tr2_moderate,loser_tr2_moderate)

###Create a column called pair and it is identical to between_set_rank
all_tr2_moderate$pair<-all_tr2_moderate$between_set_rank

###Sort the final dataset by pair
all_tr2_sorted_moderate<-all_tr2_moderate[order(all_tr2_moderate$pair),]
##need "winner" variable to be numeric
all_tr2_sorted_moderate$winner<-as.numeric(all_tr2_sorted_moderate$winner)


all_tr2_sorted_moderate<-all_tr2_sorted_moderate[, c("record_id", "treated2", "winner", "between_set_rank","pair")]
table(all_tr2_sorted_moderate$pair)
between_rank_vector1_m<-c(1:3)

long_rank_quade_tr2_moderate<-quade.prep(data=all_tr2_sorted_moderate, 
                                         set="pair",
                                         treatment="treated2",
                                         withinRank = "winner",
                                         unit="record_id",
                                         betweenRank = between_rank_vector1_m)

qout<-quade(long_rank_quade_tr2_moderate)
qout
####Plot: Permutation Dostribution####
#permutation distribution plot
#pdf("Tr_dec_exact_matched_moderate_pres_3pairs_jan2021.pdf", height=5, width=5 ) 
par(mfrow=c(1,1), mar=c(5, 5, 5, 5), family="serif")
hist(qout$Q, main="Permuatation Distribution, Decrease in Actors", xlab="Signed Rank Statistic", col=NULL)
segments(qout$Qobs, 0, qout$Qobs, y1=7, lty = 2, lwd = 2, col = "red")
text(5,2.2, "p=.125")
mtext("Moderate Pressure Sample")
#dev.off()


####Balance Plot: Decrease in actors####
#make matched pairs numeric
matched_pairs_tr2_full$matched<-as.numeric(matched_pairs_tr2_full$matches)
matched_pairs_tr2_full$matched[is.na(matched_pairs_tr2_full$matched)] <- 0

#need a vector of the variables that we need
var_names<-   c("year",
                "nominators_to_match2",
                "selectors_to_match2", 
                "percent_women_filled",
                "since_UNIVSFFRG",
                "v2jupack_filled",
                "v2cldiscw_filled",
                "v2clprptyw_filled",
                "v2csgender_filled",
                "v2pepwrgen_filled",
                "v2x_gender_filled",
                "v2xcl_rol_filled",
                "v2clacjstw_filled",
                "v2cldmovew_filled",
                "v2clslavef_filled",
                "v2mefemjrn_filled",
                "v2x_egaldem_filled",
                "v2x_partipdem_filled")
matched_sdm<-NULL
all_sdm<-NULL
balance<-as.data.frame(cbind(var_names, matched_sdm, all_sdm))


for (i in 1:length(var_names)){
  balance$matched_sdm[i]<- (((mean(na.omit(matched_pairs_tr2_full[matched_pairs_tr2_full$treated2==1, var_names[i]]))) 
                             -
                               (mean(na.omit(matched_pairs_tr2_full[matched_pairs_tr2_full$treated2==0 & matched_pairs_tr2_full$matched!=0, var_names[i]]))))
                            /
                              (sqrt(var(na.omit(matched_pairs_tr2_full[matched_pairs_tr2_full$matched!=0, var_names[i]])))))
}

for (i in 1:length(var_names)){
  balance$all_sdm[i]<- (((mean(na.omit(highcourts[highcourts$treated2==1, var_names[i]]))) 
                         -
                           (mean(na.omit(highcourts[highcourts$treated2==0, var_names[i]]))))
                        /
                          (sqrt(var(na.omit(highcourts[, var_names[i]])))))
}


balance_variables<-c("Year", 
                     "Number of Nominators", 
                     "Number of Selectors",
                     "Percent Women in Parliament", 
                     "Years since Univ. Suffrage", 
                     "Court Packing Index", 
                     "Freedom of Discussion for Women", 
                     "Property Rights for Women", 
                     "Women's Civil Soc. Participation", 
                     "Power Distributed by Gender",
                     "Women Political Empowerment", 
                     "Equality Before the Law", 
                     "Access to Justice for Women", 
                     "Freedom Domestic Mvmt for Women", 
                     "Freedom from Forced Labor for Women", 
                     "Female Journalists", 
                     "Egalitarian Democracy Index",
                     "Participatory  Democracy Index" 
)




# ####graphs
# # PLOT #
#pdf("Balance_Plot_Tretment_as_Decrease_jan2021.pdf", height=5, width=8 ) 
par(mfrow=c(1,1), mar=c(5, 18, 5, 5), family="serif")
y.axis <- c(length(balance$matched_sdm):1)
plot(balance$matched_sdm, y.axis,
     type = "p",
     axes = FALSE,
     xlab = "",
     ylab = "",
     pch = 20,
     cex = 1.2,
     xlim = c(-1,1),
     main = "Standardized Difference in Means",
     sub="Difference between Treated and Untreated Means"
)

segments(0, 0, x1=0, y1=19, lty = 2, lwd = 2, col = "grey")
axis(2, at = y.axis, label = balance_variables, las = 1, tick = T, mgp = c(2,.6,0), cex.axis = 1)
axis(1, mgp = c(0,.6,.75))
points(balance$all_sdm, y.axis, pch=1, cex=1)
legend(x= .4, y=3, c("Matched Data", "All Data"), pch=c(20, 1), cex=.8,  pt.cex=.8, bty = "n")
mtext("Balance, Treatment as Decrease")
#dev.off()



####ANALYSIS 3 (Inc or Dec, matched event type):####
#Treatment as an increase or decrease
#### Two stage matching, exact matched on event type and year

#subset of variables needed for matching
#treated3: Defining Treatment as an increase or a decrease in the number of actors
highcourts3<-highcourts[c("state", "year", "record_id","treated3", 
                          "since_UNIVSFFRG", "v2x_partipdem_filled", "v2x_egaldem_filled", "v2xcl_rol_filled",	"v2x_gender_filled",	
                          "v2jupack_filled","v2csgender_filled" ,	"v2mefemjrn_filled",	"v2cldmovew_filled",	"v2cldiscw_filled",	"v2clslavef_filled",
                          "v2clprptyw_filled",	"v2clacjstw_filled",	"v2pepwrgen_filled" ,"drop_from_match3",
                          "percent_women_filled","nominators_to_match3","selectors_to_match3", "type_of_event")]



#merge DV
highcourts3<-merge(highcourts3, DV_data, by=c("state", "year"), all.x=TRUE)

#Dropping observations where we know there has been a woman already.
#So keeping observations where we know there hasn't been a woman or data is missing
#Missingness for has_there_ever_been_wom more likely to stem from missing confirmation that there has NOT been a woman than missing the presence of a woman, so treating NAs as no woman yet.

highcourts3<-subset(highcourts3, has_there_ever_been_wom==0 | is.na(has_there_ever_been_wom))
#table(highcourts3$treated3)

##need to drop observations with state id of 80 and 81 -- german democratic republic and german federal republic. Both had women by the 50s but merged in as germnay (state==82)
highcourts3<-subset(highcourts3, state!=80 & state!=81)

#dropping countries that shouldn't be matches
highcourts3<-highcourts3[which(highcourts3$drop_from_match3!=1),] 


#Matching in two stages: first exact matching on year and constitutional event type. 
#Then, we will use full match in the optmatch package to require matching within strata 


#highcourtsclean name of full data set, going to subset to just the variables we need to exact match b/c cem does NOT like NAs
highcourts3_cem_subset<-highcourts3[, c("record_id", "type_of_event", "year", "treated3", "drop_from_match3", "state")]


highcourts3_cem_subset<-na.omit(highcourts3_cem_subset) 

#table(highcourts3_cem_subset$treated3)
#generating bin cutpoints
yearcut<-c(1969.1, 1970.1, 1971.1, 1972.1, 1973.1, 1974.1, 1975.1, 1976.1, 1977.1, 1978.1, 1979.1, 1980.1, 1981.1, 1982.1, 1983.1, 1984.1, 1985.1, 1986.1, 1987.1, 1988.1, 1989.1, 1990.1, 1991.1, 1992.1, 1993.1, 1994.1, 1995.1, 1996.1, 1997.1, 1998.1, 1999.1, 2000.1, 2001.1, 2002.1, 2003.1, 2004.1, 2005.1, 2006.1, 2007.1, 2008.1, 2009.1, 2010.1)
eventtypecut<- c(0, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1) 

#table(highcourts3_cem_subset$type_of_event, highcourts3_cem_subset$treated)
#1=New Constitution
#2=amendment
#3=interim constitution
#4= reinstated
#5=suspension
#6=non-event
#7=missing

#coarsened exact matching
cem.3 <- matchit(treated3 ~ year + type_of_event, 
                 data = highcourts3_cem_subset, method = "cem", 
                 cutpoints = list(year=yearcut, type_of_event=eventtypecut))


#treated3units matched.
cem.3$nn #39 of 42 


#the cem.3 data is only the years and countries that matched. 
#saving as a data set
cem.data2019_tr3 <- match.data(cem.3)

table(cem.data2019_tr3$subclass, cem.data2019_tr3$treated3)

#merging other matching variables for the second stage PLM matching
cem.data2019_tr3<-merge(cem.data2019_tr3, highcourts3, by= c("record_id", "type_of_event", "year", "treated3", "drop_from_match3", "state"), all.x=TRUE) 

#library(optmatch)
#library(RItools)

#propensity score matching with optmatch
psm3<-glm(treated3~ percent_women_filled 
          +since_UNIVSFFRG   
          +v2jupack_filled
          +v2x_partipdem_filled
          +v2x_egaldem_filled
          + v2xcl_rol_filled
          +v2x_gender_filled
          +v2csgender_filled
          +v2mefemjrn_filled
          +v2cldmovew_filled
          +v2cldiscw_filled
          +v2clslavef_filled
          +v2clprptyw_filled
          +v2clacjstw_filled
          +v2pepwrgen_filled
          +strata(subclass), family="binomial", data=cem.data2019_tr3)

ps.pm3 <- pairmatch(psm3, data = cem.data2019_tr3)
#summary(ps.pm3)
#print(ps.pm3, grouped=TRUE) 

#pulling matching indicator from optmatch into the dataframe
#all.equal(names(ps.pm3), row.names(cem.data2019_tr3)) #TRUE
matched_pairs_tr3<-cbind(cem.data2019_tr3, matches=ps.pm3)

#Making the matching indicator numeric so I can replace NAs with 0 (so 0= not matched)
matched_pairs_tr3$matched <-as.numeric(matched_pairs_tr3$matches)
matched_pairs_tr3$matched[is.na(matched_pairs_tr3$matched)]<-0

#only want the matched pairs; drop out countries that didnt match
just_matched_tr3<-matched_pairs_tr3[which(matched_pairs_tr3$matched!= 0),]

#For countries who have not yet had a woman, replacing year of first woman as 2017 (current year at time of analysis) in order to calculate the time from treatment until the first woman.
just_matched_tr3$year_first_wom[which(just_matched_tr3$year_first_wom==9999)]<-2017.0

#reshape wide
wide_matched_tr3 <- reshape(just_matched_tr3, 
                            timevar = "treated3",
                            idvar = c("matched", "year"),
                            direction = "wide")


#generating years to woman variable
wide_matched_tr3$time_to_wom.1 <- wide_matched_tr3$year_first_wom.1 - wide_matched_tr3$year
wide_matched_tr3$time_to_wom.0<-wide_matched_tr3$year_first_wom.0-wide_matched_tr3$year
wide_matched_tr3$diff_treated_minus_control<-wide_matched_tr3$time_to_wom.1-wide_matched_tr3$time_to_wom.0
wide_matched_tr3<-wide_matched_tr3[,c("record_id.1", 
                                      "record_id.0", 
                                      "year_first_wom.1",
                                      "year_first_wom.0",
                                      "time_to_wom.1", 
                                      "time_to_wom.0",
                                      "diff_treated_minus_control",
                                      "matched",
                                      "year")]

long_matched_tr3<-reshape(wide_matched_tr3,
                          varying=c("record_id.1", 
                                    "record_id.0", 
                                    "year_first_wom.1",
                                    "year_first_wom.0",
                                    "time_to_wom.1", 
                                    "time_to_wom.0"),
                          idvar = c("matched", "year"),
                          direction="long")

long_matched_tr3<-rename(long_matched_tr3, treated3=time)

### Drop out the NAs
long_matched_tr3<-na.omit(long_matched_tr3)

### Create a new column called "winner"
long_matched_tr3$winner<-"NA"
for (i in 1:nrow(long_matched_tr3))
{
  for (j in 1:nrow(long_matched_tr3))
  {
    if(j!=i)
    {
      if(long_matched_tr3$matched[i]==long_matched_tr3$matched[j])
      {
        if(long_matched_tr3$year_first_wom[i]<long_matched_tr3$year_first_wom[j])
        {
          long_matched_tr3$winner[i]<-1
        }
        else if(long_matched_tr3$year_first_wom[i]>long_matched_tr3$year_first_wom[j])
        {
          long_matched_tr3$winner[i]<-0
        }
        else if(long_matched_tr3$year_first_wom[i]==long_matched_tr3$year_first_wom[j])
        {
          long_matched_tr3$winner[i]<-"NA"
        }
        
      }
    }
  }
}

### Create a value called absolute value
long_matched_tr3$abs_val<-abs(long_matched_tr3$diff_treated_minus_control)


###Generate a column called between set rank
###Split into winner loser column
winner_tr3<-subset(long_matched_tr3,long_matched_tr3$winner==1)
loser_tr3<-subset(long_matched_tr3,long_matched_tr3$winner==0)

###Sort the subsets by "time_to_wom"
winner_tr3<-winner_tr3[rev(order(winner_tr3$time_to_wom)),]
loser_tr3<-loser_tr3[rev(order(loser_tr3$time_to_wom)),]

###Create the column called between set rank
winner_tr3$between_set_rank<-rank(winner_tr3$abs_val,ties.method="first")
loser_tr3$between_set_rank<-rank(loser_tr3$abs_val,ties.method="first")

###Merge the subsets again
all_tr3<-rbind(winner_tr3,loser_tr3)

###Create a column called pair and it is identical to between_set_rank
all_tr3$pair<-all_tr3$between_set_rank

###Sort the final dataset by pair
all_tr3_sorted<-all_tr3[order(all_tr3$pair),]
##need "winner" variable to be numeric
all_tr3_sorted$winner<-as.numeric(all_tr3_sorted$winner)

####Qual CI
#library("qualCI")

all_tr3_sorted<-all_tr3_sorted[, c("record_id", "treated3", "winner", "between_set_rank","pair")]

table(all_tr3_sorted$pair)
#set vector length to number of pairs
between_rank_vector1<-c(1:19)


long_rank_quade_tr3<-quade.prep(data=all_tr3_sorted, 
                                set="pair",
                                treatment="treated3",
                                withinRank = "winner",
                                unit="record_id",
                                betweenRank = between_rank_vector1)

qout<-quade(long_rank_quade_tr3)
qout

####Plot: Permutation Distribution####
#pdf("TR_inc_dec_matched_event_19pairs_jan2021.pdf", height=6, width=5 ) 
par(mfrow=c(1,1), mar=c(5, 5, 5, 5), family="serif")
hist(qout$Q, main="Permuatation Distribution", xlab="Signed Rank Statistic", col=NULL)
segments(qout$Qobs, 0, qout$Qobs, y1=80000, lty = 2, lwd = 2, col = "red")
text(140,65000, "p=.0017")
#text(15, 6000, "Full Sample")
#text(15, 5600, "19 Pairs")
mtext("Increase or Decrease, Matched on Event, Full Sample")
#dev.off()


####Balance: Inc or Dec####

#need a vector of the variables that we need
var_names<-   c("year",
                "nominators_to_match3",
                "selectors_to_match3", 
                "percent_women_filled",
                "since_UNIVSFFRG",
                "v2jupack_filled",
                "v2cldiscw_filled",
                "v2clprptyw_filled",
                "v2csgender_filled",
                "v2pepwrgen_filled",
                "v2x_gender_filled",
                "v2xcl_rol_filled",
                "v2clacjstw_filled",
                "v2cldmovew_filled",
                "v2clslavef_filled",
                "v2mefemjrn_filled",
                "v2x_egaldem_filled",
                "v2x_partipdem_filled")
matched_sdm<-NULL
all_sdm<-NULL
balance<-as.data.frame(cbind(var_names, matched_sdm, all_sdm))


for (i in 1:length(var_names)){
  balance$matched_sdm[i]<- (((mean(na.omit(matched_pairs_tr3[matched_pairs_tr3$treated3==1, var_names[i]]))) 
                             -
                               (mean(na.omit(matched_pairs_tr3[matched_pairs_tr3$treated3==0 & matched_pairs_tr3$matched!=0, var_names[i]]))))
                            /
                              (sqrt(var(na.omit(matched_pairs_tr3[matched_pairs_tr3$matched!=0, var_names[i]])))))
}

for (i in 1:length(var_names)){
  balance$all_sdm[i]<- (((mean(na.omit(highcourts[highcourts$treated3==1, var_names[i]]))) 
                         -
                           (mean(na.omit(highcourts[highcourts$treated3==0, var_names[i]]))))
                        /
                          (sqrt(var(na.omit(highcourts[, var_names[i]])))))
}


balance_variables<-c("Year", 
                     "Number of Nominators", 
                     "Number of Selectors",
                     "Percent Women in Parliament", 
                     "Years since Univ. Suffrage", 
                     "Court Packing Index", 
                     "Freedom of Discussion for Women", 
                     "Property Rights for Women", 
                     "Women's Civil Soc. Participation", 
                     "Power Distributed by Gender",
                     "Women Political Empowerment", 
                     "Equality Before the Law", 
                     "Access to Justice for Women", 
                     "Freedom Domestic Mvmt for Women", 
                     "Freedom from Forced Labor for Women", 
                     "Female Journalists", 
                     "Egalitarian Democracy Index",
                     "Participatory  Democracy Index" 
)




# ####graphs
# # PLOT #
#pdf("Balance_Plot_Tretment_as_Inc_or_Dec_jan2021.pdf", height=5, width=8 ) 
par(mfrow=c(1,1), mar=c(5, 18, 5, 5), family="serif")
y.axis <- c(length(balance$matched_sdm):1)
plot(balance$matched_sdm, y.axis,
     type = "p",
     axes = FALSE,
     xlab = "",
     ylab = "",
     pch = 20,
     cex = 1.2,
     xlim = c(-1,1),
     main = "Standardized Difference in Means",
     sub="Difference between Treated and Untreated Means"
)

segments(0, 0, x1=0, y1=19, lty = 2, lwd = 2, col = "grey")
axis(2, at = y.axis, label = balance_variables, las = 1, tick = T, mgp = c(2,.6,0), cex.axis = 1)
axis(1, mgp = c(0,.6,.75))
points(balance$all_sdm, y.axis, pch=1, cex=1)
legend(x= .4, y=3, c("Matched Data", "All Data"), pch=c(20, 1), cex=.8,  pt.cex=.8, bty = "n")
mtext("Balance, Treatment as Increase or Decrease")
#dev.off()



####ANALYSIS 4 Any change in process####
#Treatment here is defined as any change in the selection process. Exact matched on year and event type


#subset of variables needed for matching
#Treatment11: all changes broadly defined: flowchart_change==1
highcourts11<-highcourts[c("state", "year", "record_id", "treated11",
                           "since_UNIVSFFRG", "v2x_partipdem_filled", "v2x_egaldem_filled", "v2xcl_rol_filled",	"v2x_gender_filled",	
                           "v2jupack_filled","v2csgender_filled" ,	"v2mefemjrn_filled",	"v2cldmovew_filled",	"v2cldiscw_filled",	"v2clslavef_filled",
                           "v2clprptyw_filled",	"v2clacjstw_filled",	"v2pepwrgen_filled" , "drop_from_match11", "percent_women_filled", "type_of_event")]


#merge DV
highcourts11<-merge(highcourts11, DV_data, by=c("state", "year"), all.x=TRUE)

#Dropping observations where we know there has been a woman already.
#So keeping observations where we know there hasn't been a woman or data is missing
#Missingness for has_there_ever_been_wom more likely to stem from missing confirmation that there has NOT been a woman than missing the presence of a woman, so treating NAs as no woman yet.

highcourts11<-subset(highcourts11, has_there_ever_been_wom==0 | is.na(has_there_ever_been_wom))
#table(highcourts11$treated11)
# 79 treated11countries,  35 second treated11, which means that the country was treated twice in our time frame (will drop out with the drop from match value)

##need to drop observations with state id of 80 and 81 -- german democratic republic and german federal republic. Booh had women by the 50s but merged in as germnay (state==82)
highcourts11<-subset(highcourts11, state!=80 & state!=81) #two treated units drop

#dropping countries that shouldn't be matches
highcourts11<-highcourts11[which(highcourts11$drop_from_match11!=1),] 

#Matching in two stages: first exact matching on year and constitutional event type. 
#Then, we will use full match in the optmatch package to require matching within strata 

#highcourtsclean name of full data set, going to subset to just the variables we need to exact match b/c cem does NOT like NAs
highcourts11_cem_subset<-highcourts11[, c("record_id", "type_of_event", "year", "treated11", "drop_from_match11", "state")]
highcourts11_cem_subset<-na.omit(highcourts11_cem_subset) 

#table(highcourts11_cem_subset$type_of_event, highcourts11_cem_subset$treated11)

#generating bin cutpoints
yearcut<-c(1969.1, 1970.1, 1971.1, 1972.1, 1973.1, 1974.1, 1975.1, 1976.1, 1977.1, 1978.1, 1979.1, 1980.1, 1981.1, 1982.1, 1983.1, 1984.1, 1985.1, 1986.1, 1987.1, 1988.1, 1989.1, 1990.1, 1991.1, 1992.1, 1993.1, 1994.1, 1995.1, 1996.1, 1997.1, 1998.1, 1999.1, 2000.1, 2001.1, 2002.1, 2003.1, 2004.1, 2005.1, 2006.1, 2007.1, 2008.1, 2009.1, 2010.1)
eventtypecut<- c(0, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1) 

#coarsened exact matching
cem.11 <- matchit(treated11 ~ year + type_of_event, 
                  data = highcourts11_cem_subset, method = "cem", 
                  cutpoints = list(year=yearcut, type_of_event=eventtypecut))


#treated4units matched.
cem.11$nn #60 of 77 matched units

#the cem.4 data is only the years and countries that matched. 
#saving as a data set
cem.data2019_tr11 <- match.data(cem.11)

#table(cem.data2019_tr11$subclass, cem.data2019_tr11$treated11)

#merging other matching variables for the second stage PLM matching
cem.data2019_tr11<-merge(cem.data2019_tr11, highcourts11, by= c("record_id", "type_of_event", "year", "treated11", "drop_from_match11", "state"), all.x=TRUE) 


#library(optmatch)
#library(RItools)
#propensity score matching with optmatch
psm11<-glm(treated11~ percent_women_filled 
           +since_UNIVSFFRG   
           +v2jupack_filled
           +v2x_partipdem_filled
           +v2x_egaldem_filled
           + v2xcl_rol_filled
           +v2x_gender_filled
           +v2csgender_filled
           +v2mefemjrn_filled
           +v2cldmovew_filled
           +v2cldiscw_filled
           +v2clslavef_filled
           +v2clprptyw_filled
           +v2clacjstw_filled
           +v2pepwrgen_filled
           +strata(subclass), family="binomial", data=cem.data2019_tr11)

ps.pm11 <- pairmatch(psm11, data = cem.data2019_tr11)
#summary(ps.pm11) #55 of 60 treated countries matched
#print(ps.pm11, grouped=TRUE) 

#pulling matching indicator from optmatch into the dataframe
all.equal(names(ps.pm11), row.names(cem.data2019_tr11)) #TRUE
matched_pairs_tr11<-cbind(cem.data2019_tr11, matches=ps.pm11)


###55 treated pairs is too many for the Qual CI package; will use wilcoxwn test instead. 33 pairs with dv data for both


#Making the matching indicator numeric so I can replace NAs with 0 (so 0= not matched)
matched_pairs_tr11$matched <-as.numeric(matched_pairs_tr11$matches)
matched_pairs_tr11$matched[is.na(matched_pairs_tr11$matched)]<-0


#only want the matched pairs; drop out countries that didnt match
just_matched_tr11<-matched_pairs_tr11[which(matched_pairs_tr11$matched!= 0),]

#For countries who have not yet had a woman, replacing year of first woman as 2017 (current year at time of analysis) in order to calculate the time from treatment until the first woman.
just_matched_tr11$year_first_wom[which(just_matched_tr11$year_first_wom==9999)]<-2017.0

#reshape wide
wide_matched_tr11 <- reshape(just_matched_tr11, 
                             timevar = "treated11",
                             idvar = c("matched", "year"),
                             direction = "wide")


#generating years to woman variable
wide_matched_tr11$time_to_wom.1 <- wide_matched_tr11$year_first_wom.1 - wide_matched_tr11$year
wide_matched_tr11$time_to_wom.0<-wide_matched_tr11$year_first_wom.0-wide_matched_tr11$year
wide_matched_tr11$diff_treated_minus_control<-wide_matched_tr11$time_to_wom.1-wide_matched_tr11$time_to_wom.0

wide_matched_tr11<-wide_matched_tr11[,c("record_id.1", 
                                        "record_id.0", 
                                        "year_first_wom.1",
                                        "year_first_wom.0",
                                        "time_to_wom.1", 
                                        "time_to_wom.0",
                                        "diff_treated_minus_control",
                                        "matched",
                                        "year")]

### Drop out the NAs
wide_matched_tr11<-na.omit(wide_matched_tr11)

##55 matched pairs, 33 with DV data for both pairs: too much for Qual CI to handle. Will use wilcoxen test instead


####wilcox.test####
wilcox.test(wide_matched_tr11$time_to_wom.1, wide_matched_tr11$time_to_wom.0, paired=TRUE)
wilcox.test(wide_matched_tr11$time_to_wom.1, wide_matched_tr11$time_to_wom.0, paired=TRUE, alternative = "less")

#p-value = 0.07983
#one sided: .03991

#what is the average difference in time to woman between treated and control?
summary(wide_matched_tr11$time_to_wom.0) #17.52 years for control countries/ 18 median
summary(wide_matched_tr11$time_to_wom.1) #13.82 years for treated countries/ 10 median


####Balance: Any Change####

#need a vector of the variables that we need
var_names<-   c("year",
                "percent_women_filled",
                "since_UNIVSFFRG",
                "v2jupack_filled",
                "v2cldiscw_filled",
                "v2clprptyw_filled",
                "v2csgender_filled",
                "v2pepwrgen_filled",
                "v2x_gender_filled",
                "v2xcl_rol_filled",
                "v2clacjstw_filled",
                "v2cldmovew_filled",
                "v2clslavef_filled",
                "v2mefemjrn_filled",
                "v2x_egaldem_filled",
                "v2x_partipdem_filled")
matched_sdm<-NULL
all_sdm<-NULL
balance<-as.data.frame(cbind(var_names, matched_sdm, all_sdm))


for (i in 1:length(var_names)){
  balance$matched_sdm[i]<- (((mean(na.omit(just_matched_tr11[just_matched_tr11$treated11==1, var_names[i]]))) 
                             -
                               (mean(na.omit(just_matched_tr11[just_matched_tr11$treated11==0 & just_matched_tr11$matched!=0, var_names[i]]))))
                            /
                              (sqrt(var(na.omit(just_matched_tr11[just_matched_tr11$matched!=0, var_names[i]])))))
}

for (i in 1:length(var_names)){
  balance$all_sdm[i]<- (((mean(na.omit(highcourts[highcourts$treated11==1, var_names[i]]))) 
                         -
                           (mean(na.omit(highcourts[highcourts$treated11==0, var_names[i]]))))
                        /
                          (sqrt(var(na.omit(highcourts[, var_names[i]])))))
}


balance_variables<-c("Year", 
                     "Percent Women in Parliament", 
                     "Years since Univ. Suffrage", 
                     "Court Packing Index", 
                     "Freedom of Discussion for Women", 
                     "Property Rights for Women", 
                     "Women's Civil Soc. Participation", 
                     "Power Distributed by Gender",
                     "Women Political Empowerment", 
                     "Equality Before the Law", 
                     "Access to Justice for Women", 
                     "Freedom Domestic Mvmt for Women", 
                     "Freedom from Forced Labor for Women", 
                     "Female Journalists", 
                     "Egalitarian Democracy Index",
                     "Participatory  Democracy Index" 
)




# ####graphs
# # PLOT #
#pdf("Balance_Plot_Tretment_as_any_change_jan2021.pdf", height=5, width=8 ) 
par(mfrow=c(1,1), mar=c(5, 18, 5, 5), family="serif")
y.axis <- c(length(balance$matched_sdm):1)
plot(balance$matched_sdm, y.axis,
     type = "p",
     axes = FALSE,
     xlab = "",
     ylab = "",
     pch = 20,
     cex = 1.2,
     xlim = c(-1,1),
     main = "Standardized Difference in Means",
     sub="Difference between Treated and Untreated Means"
)

segments(0, 0, x1=0, y1=19, lty = 2, lwd = 2, col = "grey")
axis(2, at = y.axis, label = balance_variables, las = 1, tick = T, mgp = c(2,.6,0), cex.axis = 1)
axis(1, mgp = c(0,.6,.75))
points(balance$all_sdm, y.axis, pch=1, cex=1)
legend(x= .4, y=16, c("Matched Data", "All Data"), pch=c(20, 1), cex=.8,  pt.cex=.8, bty = "n")
mtext("Balance, Treatment as any change")
#dev.off()



####Footnote 19: Analysis 1 +Analysis 2, recalculate sign rank statistic####
objects(long_matched_tr1_original)
objects(long_matched_tr2_original)
long_matched_tr2_original<-rename(long_matched_tr2_original, treated_combined=treated2)
long_matched_tr1_original<-rename(long_matched_tr1_original, treated_combined=treated1)
long_matched_tr2_original$matched<-long_matched_tr2_original$matched +.1
long_matched_combined<-rbind(long_matched_tr1_original, long_matched_tr2_original)


##need to re-rank
long_matched_combined$winner<-"NA"
for (i in 1:nrow(long_matched_combined))
{
  for (j in 1:nrow(long_matched_combined))
  {
    if(j!=i)
    {
      if(long_matched_combined$matched[i]==long_matched_combined$matched[j])
      {
        if(long_matched_combined$year_first_wom[i]<long_matched_combined$year_first_wom[j])
        {
          long_matched_combined$winner[i]<-1
        }
        else if(long_matched_combined$year_first_wom[i]>long_matched_combined$year_first_wom[j])
        {
          long_matched_combined$winner[i]<-0
        }
        else if(long_matched_combined$year_first_wom[i]==long_matched_combined$year_first_wom[j])
        {
          long_matched_combined$winner[i]<-"NA"
        }
        
      }
    }
  }
}

### Create a value called absolute value
long_matched_combined$abs_val<-abs(long_matched_combined$diff_treated_minus_control)

###Generate a column called between set rank
###Spilt into winner loser column
winner_tr_combined<-subset(long_matched_combined,long_matched_combined$winner==1)
loser_tr_combined<-subset(long_matched_combined,long_matched_combined$winner==0)
###Sort the subsets by "time_to_wom"
winner_tr_combined<-winner_tr_combined[rev(order(winner_tr_combined$time_to_wom)),]
loser_tr_combined<-loser_tr_combined[rev(order(loser_tr_combined$time_to_wom)),]
###Create the column called between set rank
winner_tr_combined$between_set_rank<-rank(winner_tr_combined$abs_val,ties.method="first")
loser_tr_combined$between_set_rank<-rank(loser_tr_combined$abs_val,ties.method="first")

###Merge the subsets again
all_tr_combined<-rbind(winner_tr_combined,loser_tr_combined)

###Create a column called pair and it is identical to between_set_rank
all_tr_combined$pair<-all_tr_combined$between_set_rank

###Sort the final dataset by pair
all_tr_combined_sorted<-all_tr_combined[order(all_tr_combined$pair),]
##need "winner" variable to be numeric
all_tr_combined_sorted$winner<-as.numeric(all_tr_combined_sorted$winner)


####Qual CI
#library("qualCI")
all_tr_combined_sorted<-all_tr_combined_sorted[, c("record_id", "treated_combined", "winner", "between_set_rank","pair")]
table(all_tr_combined_sorted$pair)
between_rank_vector1<-c(1:18)

long_rank_quade_tr_combined<-quade.prep(data=all_tr_combined_sorted, 
                                        set="pair",
                                        treatment="treated_combined",
                                        withinRank = "winner",
                                        unit="record_id",
                                        betweenRank = between_rank_vector1)

qout<-quade(long_rank_quade_tr_combined)
qout


####APPENDIX Increase in number of *nominators*####
#Exact matched on year and pre-change institution
highcourt_inc_nom<-highcourts[c("state", "year", "record_id", "treated_inc_nom",
                                "since_UNIVSFFRG", "v2x_partipdem_filled", "v2x_egaldem_filled", "v2xcl_rol_filled",	"v2x_gender_filled",	
                                "v2jupack_filled","v2csgender_filled" ,	"v2mefemjrn_filled",	"v2cldmovew_filled",	"v2cldiscw_filled",	"v2clslavef_filled",
                                "v2clprptyw_filled",	"v2clacjstw_filled",	"v2pepwrgen_filled" , "drop_from_match_inc_nom", "percent_women_filled", "nominators_to_match_inc_nom","selectors_to_match_inc_nom")]

#merge
highcourt_inc_nom<-merge(highcourt_inc_nom, DV_data, by=c("state", "year"), all.x=TRUE)
#table(highcourt_inc_nom$has_there_ever_been_wom)

highcourt_inc_nom<-subset(highcourt_inc_nom, has_there_ever_been_wom==0 | is.na(has_there_ever_been_wom))#7812 
#table(highcourt_inc_nom$treated_inc_nom) 


##need to drop observations with state id of 80 and 81 -- german democratic republic and german federal republic. Booh had women by the 50s but merged in as germnay (state==82)
highcourt_inc_nom<-subset(highcourt_inc_nom, state!=80 & state!=81)



#dropping countries that shouldn't be matches
highcourt_inc_nom<-highcourt_inc_nom[which(highcourt_inc_nom$drop_from_match_inc_nom!=1),] 
#table_treated_tr_inc_nom<-highcourt_inc_nom[which(highcourt_inc_nom$treated_inc_nom==1), "record_id"]

#Matching in two stages: first exact matching on year and pre-treatment institutions. 
#Then, we will use full match in the optmatch package to require matching within strata 

#highcourtsclean name of full data set, going to subset to just the variables we need to exact match b/c cem does NOT like NAs
highcourt_inc_nom_cem_subset<-highcourt_inc_nom[, c("record_id", "nominators_to_match_inc_nom", "selectors_to_match_inc_nom", "year", "treated_inc_nom", "drop_from_match_inc_nom", "state")]
highcourt_inc_nom_cem_subset<-na.omit(highcourt_inc_nom_cem_subset)

##treatment variable needs to be 0,1. Right now I have treated==2 for observation in which there was more than one treatment in the time period. We are looking at just the FIRST treatment, so will replace those with 0 
highcourt_inc_nom_cem_subset$treated[which(highcourt_inc_nom_cem_subset$treated==2)]<-0


#generating bin cutpoints
yearcut<-c(1969.1, 1970.1, 1971.1, 1972.1, 1973.1, 1974.1, 1975.1, 1976.1, 1977.1, 1978.1, 1979.1, 1980.1, 1981.1, 1982.1, 1983.1, 1984.1, 1985.1, 1986.1, 1987.1, 1988.1, 1989.1, 1990.1, 1991.1, 1992.1, 1993.1, 1994.1, 1995.1, 1996.1, 1997.1, 1998.1, 1999.1, 2000.1, 2001.1, 2002.1, 2003.1, 2004.1, 2005.1, 2006.1, 2007.1, 2008.1, 2009.1, 2010.1)
nominatorscut<- c(-.1, .9, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1, 8.1, 9.1) 
selectorscut<- c(-.1, .9, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1)

#coarsened exact matching
cem.1 <- matchit(treated_inc_nom ~ year + nominators_to_match_inc_nom + selectors_to_match_inc_nom, 
                 data = highcourt_inc_nom_cem_subset, method = "cem", 
                 cutpoints = list(year=yearcut, nominators_to_match_inc_nom=nominatorscut, selectors_to_match_inc_nom=selectorscut))


#treated units matchd.
cem.1$nn #18 of 19 matched units


#the cem.1 data is only the years and countries that matched. 
#saving as a data set
cem.data2017_inc_nom <- match.data(cem.1)

#table(cem.data2017_inc_nom$subclass, cem.data2017_inc_nom$treated_inc_nom)

#merging other matching variables for the second stage back in with the CEM output
cem.data2017_inc_nom<-merge(cem.data2017_inc_nom, highcourt_inc_nom, by= c("record_id", "nominators_to_match_inc_nom","selectors_to_match_inc_nom", "year", "treated_inc_nom", "drop_from_match_inc_nom", "state"), all.x=TRUE) 

#optmatch matching#
#propensity score matching with optmatch
psm<-glm(treated_inc_nom~ percent_women_filled 
         +since_UNIVSFFRG   
         +v2jupack_filled
         +v2x_partipdem_filled
         +v2x_egaldem_filled
         + v2xcl_rol_filled
         +v2x_gender_filled
         +v2csgender_filled
         +v2mefemjrn_filled
         +v2cldmovew_filled
         +v2cldiscw_filled
         +v2clslavef_filled
         +v2clprptyw_filled
         +v2clacjstw_filled
         +v2pepwrgen_filled
         +strata(subclass), family="binomial", data=cem.data2017_inc_nom)

ps.pm <- pairmatch(psm, data = cem.data2017_inc_nom)
#summary(ps.pm)
#print(ps.pm, grouped=TRUE) ##hey it worked. all 26 treated units matched. Checked the first two and they matched within strata too!


#pulling matching indicator from optmatch into the dataframe
all.equal(names(ps.pm), row.names(cem.data2017_inc_nom)) #TRUE
matched_pairs_inc_nom<-cbind(cem.data2017_inc_nom, matches=ps.pm)
matched_parts_inc_nom_full<-matched_pairs_inc_nom

#QualCI

matched_pairs_inc_nom$matched <-as.numeric(matched_pairs_inc_nom$matches)
matched_pairs_inc_nom$matched[is.na(matched_pairs_inc_nom$matched)]<-0

just_matched_inc_nom<-matched_pairs_inc_nom[which(matched_pairs_inc_nom$matched!= 0),]
just_matched_inc_nom<-just_matched_inc_nom[c("record_id", "year","treated_inc_nom", "matched", "year_first_wom")]


#replacing year of first woman as 2017 if they have not yet had a woman yet  for calculating time to first woman.
just_matched_inc_nom$year_first_wom[which(just_matched_inc_nom$year_first_wom==9999)]<-2017.0

#reshape wide
wide_matched_inc_nom <- reshape(just_matched_inc_nom, 
                                timevar = "treated_inc_nom",
                                idvar = c("matched", "year"),
                                direction = "wide")



#generating years to woman variable
wide_matched_inc_nom$time_to_wom.1 <- wide_matched_inc_nom$year_first_wom.1 - wide_matched_inc_nom$year
wide_matched_inc_nom$time_to_wom.0<-wide_matched_inc_nom$year_first_wom.0-wide_matched_inc_nom$year
wide_matched_inc_nom$diff_treated_minus_control<-wide_matched_inc_nom$time_to_wom.1-wide_matched_inc_nom$time_to_wom.0


#long format##
long_matched_inc_nom<-reshape(wide_matched_inc_nom,
                              varying=c("record_id.1", 
                                        "record_id.0", 
                                        "year_first_wom.1",
                                        "year_first_wom.0",
                                        "time_to_wom.1", 
                                        "time_to_wom.0"),
                              idvar = c("matched", "year"),
                              direction="long")

long_matched_inc_nom<-rename(long_matched_inc_nom, treated_inc_nom=time)

### Drop out the NAs
long_matched_inc_nom<-na.omit(long_matched_inc_nom)#52 to 32
### Create a new column called "winner"
long_matched_inc_nom$winner<-"NA"
for (i in 1:nrow(long_matched_inc_nom))
{
  for (j in 1:nrow(long_matched_inc_nom))
  {
    if(j!=i)
    {
      if(long_matched_inc_nom$matched[i]==long_matched_inc_nom$matched[j])
      {
        if(long_matched_inc_nom$year_first_wom[i]<long_matched_inc_nom$year_first_wom[j])
        {
          long_matched_inc_nom$winner[i]<-1
        }
        else if(long_matched_inc_nom$year_first_wom[i]>long_matched_inc_nom$year_first_wom[j])
        {
          long_matched_inc_nom$winner[i]<-0
        }
        else if(long_matched_inc_nom$year_first_wom[i]==long_matched_inc_nom$year_first_wom[j])
        {
          long_matched_inc_nom$winner[i]<-"NA"
        }
        
      }
    }
  }
}

### Create a value called absolute value
long_matched_inc_nom$abs_val<-abs(long_matched_inc_nom$diff_treated_minus_control)

###Generate a column called between set rank
###Spilit into winner loser column
winner_inc_nom<-subset(long_matched_inc_nom,long_matched_inc_nom$winner==1)
loser_inc_nom<-subset(long_matched_inc_nom,long_matched_inc_nom$winner==0)
###Sort the subsets by "time_to_wom"
winner_inc_nom<-winner_inc_nom[rev(order(winner_inc_nom$time_to_wom)),]
loser_inc_nom<-loser_inc_nom[rev(order(loser_inc_nom$time_to_wom)),]
###Create the column called between set rank
winner_inc_nom$between_set_rank<-rank(winner_inc_nom$abs_val,ties.method="first")
loser_inc_nom$between_set_rank<-rank(loser_inc_nom$abs_val,ties.method="first")

###Merge the subsets again
all_inc_nom<-rbind(winner_inc_nom,loser_inc_nom)

###Create a column called pair and it is identical to between_set_rank
all_inc_nom$pair<-all_inc_nom$between_set_rank

###Sort the final dataset by pair
all_inc_nom_sorted<-all_inc_nom[order(all_inc_nom$pair),]
##need "winner" variable to be numeric
all_inc_nom_sorted$winner<-as.numeric(all_inc_nom_sorted$winner)

all_inc_nom_sorted<-all_inc_nom_sorted[, c("record_id", "treated_inc_nom", "winner", "between_set_rank","pair")]

table(all_inc_nom_sorted$pair)#10 pairs

between_rank_vector1<-c(1:10)


long_rank_quade_inc_nom<-quade.prep(data=all_inc_nom_sorted, 
                                    set="pair",
                                    treatment="treated_inc_nom",
                                    withinRank = "winner",
                                    unit="record_id",
                                    betweenRank = between_rank_vector1)

qout<-quade(long_rank_quade_inc_nom)
qout
####Plot####
#pdf("APPENDIX_Increase_Nominators_10pair_January2020.pdf", height=5.15, width=5 ) 
par(mfrow=c(1,1), mar=c(5, 5, 5, 5), family="serif")
hist(qout$Q, main="Permuatation Distribution", xlab="Signed Rank Statistic", col=NULL)
segments(qout$Qobs, 0, qout$Qobs, y1=7000000, lty = 2, lwd = 2, col = "red")
text(50,37, "p=.042")
mtext("Tr= Increase in Nominators, Exact Matched")
#dev.off()


####Appendix: increase in the number of selectors####
#increase in the number of selectors,  two stage matched, exact matched on pre change institutions

highcourts_inc_sec<-highcourts[c("state", "year", "record_id", "treated_inc_sec", 
                                 "since_UNIVSFFRG", "v2x_partipdem_filled", "v2x_egaldem_filled", "v2xcl_rol_filled",	"v2x_gender_filled",	
                                 "v2jupack_filled","v2csgender_filled" ,	"v2mefemjrn_filled",	"v2cldmovew_filled",	"v2cldiscw_filled",	"v2clslavef_filled",
                                 "v2clprptyw_filled",	"v2clacjstw_filled",	"v2pepwrgen_filled" , "drop_from_match_inc_sec", "percent_women_filled", "nominators_to_match_inc_sec","selectors_to_match_inc_sec")]

#merge
highcourts_inc_sec<-merge(highcourts_inc_sec, DV_data, by=c("state", "year"), all.x=TRUE)
#table(highcourts_inc_sec$has_there_ever_been_wom)

highcourts_inc_sec<-subset(highcourts_inc_sec, has_there_ever_been_wom==0 | is.na(has_there_ever_been_wom))
#table(highcourts_inc_sec$treated_inc_sec) 

##need to drop observations with state id of 80 and 81 -- german democratic republic and german federal republic. Booh had women by the 50s but merged in as germnay (state==82)
highcourts_inc_sec<-subset(highcourts_inc_sec, state!=80 & state!=81)

#dropping countries that shouldnt be mathces
highcourts_inc_sec<-highcourts_inc_sec[which(highcourts_inc_sec$drop_from_match_inc_sec!=1),] #4076 to 3049

#Matching in two stages: first exact matching on year and pre-treatment institutions. 
#Then, we will use full match in the optmatch package to require matching within strata 

#highcourtsclean name of full data set, going to subset to just the variables we need to exact match b/c cem does NOT like NAs
highcourts_inc_sec_cem_subset<-highcourts_inc_sec[, c("record_id", "nominators_to_match_inc_sec", "selectors_to_match_inc_sec", "year", "treated_inc_sec", "drop_from_match_inc_sec", "state")]
highcourts_inc_sec_cem_subset<-na.omit(highcourts_inc_sec_cem_subset)

##treatment variable needs to be 0,1. Right now I have treated==2 for observation in which there was more than one treatment in the time period. We are looking at just the FIRST treatment, so will replace those with 0 
highcourts_inc_sec_cem_subset$treated[which(highcourts_inc_sec_cem_subset$treated==2)]<-0

#generating bin cutpoints
yearcut<-c(1969.1, 1970.1, 1971.1, 1972.1, 1973.1, 1974.1, 1975.1, 1976.1, 1977.1, 1978.1, 1979.1, 1980.1, 1981.1, 1982.1, 1983.1, 1984.1, 1985.1, 1986.1, 1987.1, 1988.1, 1989.1, 1990.1, 1991.1, 1992.1, 1993.1, 1994.1, 1995.1, 1996.1, 1997.1, 1998.1, 1999.1, 2000.1, 2001.1, 2002.1, 2003.1, 2004.1, 2005.1, 2006.1, 2007.1, 2008.1, 2009.1, 2010.1)
nominatorscut<- c(-.1, .9, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1, 8.1, 9.1) 
secectorscut<- c(-.1, .9, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1)

#coarsened exact matching
cem.1 <- matchit(treated_inc_sec ~ year + nominators_to_match_inc_sec + selectors_to_match_inc_sec, 
                 data = highcourts_inc_sec_cem_subset, method = "cem", 
                 cutpoints = list(year=yearcut, nominators_to_match=nominatorscut, secectors_to_match=secectorscut))

#treated units matchd.
cem.1$nn #15 of 17 matched units

#the cem.1 data is only the years and countries that matched. 
#saving as a data set
cem.data2017_inc_sec <- match.data(cem.1)

#table(cem.data2017_inc_sec$subclass, cem.data2017_inc_sec$treated_inc_sec)

#merging other matching variables for the second stage back in with the CEM output
cem.data2017_inc_sec<-merge(cem.data2017_inc_sec, highcourts_inc_sec, by= c("record_id", "nominators_to_match_inc_sec","selectors_to_match_inc_sec", "year", "treated_inc_sec", "drop_from_match_inc_sec", "state"), all.x=TRUE) # n=808

#optmatch matching#
#propensity score matching with optmatch
psm<-glm(treated_inc_sec~ percent_women_filled 
         +since_UNIVSFFRG   
         +v2jupack_filled
         +v2x_partipdem_filled
         +v2x_egaldem_filled
         + v2xcl_rol_filled
         +v2x_gender_filled
         +v2csgender_filled
         +v2mefemjrn_filled
         +v2cldmovew_filled
         +v2cldiscw_filled
         +v2clslavef_filled
         +v2clprptyw_filled
         +v2clacjstw_filled
         +v2pepwrgen_filled
         +strata(subclass), family="binomial", data=cem.data2017_inc_sec)

ps.pm <- pairmatch(psm, data = cem.data2017_inc_sec)
#summary(ps.pm)
#print(ps.pm, grouped=TRUE) ## 

#pulling matching indicator from optmatch into the dataframe
all.equal(names(ps.pm), row.names(cem.data2017_inc_sec)) #TRUE
matched_pairs_inc_sec<-cbind(cem.data2017_inc_sec, matches=ps.pm)
matched_parts_inc_sec_full<-matched_pairs_inc_sec

#NAs
matched_pairs_inc_sec$matched <-as.numeric(matched_pairs_inc_sec$matches)
matched_pairs_inc_sec$matched[is.na(matched_pairs_inc_sec$matched)]<-0

just_matched_inc_sec<-matched_pairs_inc_sec[which(matched_pairs_inc_sec$matched!= 0),]
just_matched_inc_sec<-just_matched_inc_sec[, c("record_id", "year","treated_inc_sec", "matched", "year_first_wom")]

#replacing year of first woman as 2017 if they have not yet had a woman yet  for calculating time to first woman.
just_matched_inc_sec$year_first_wom[which(just_matched_inc_sec$year_first_wom==9999)]<-2017.0

#reshape wide
wide_matched_inc_sec <- reshape(just_matched_inc_sec, 
                                timevar = "treated_inc_sec",
                                idvar = c("matched", "year"),
                                direction = "wide")

#generating years to woman variable
wide_matched_inc_sec$time_to_wom.1 <- wide_matched_inc_sec$year_first_wom.1 - wide_matched_inc_sec$year
wide_matched_inc_sec$time_to_wom.0<-wide_matched_inc_sec$year_first_wom.0-wide_matched_inc_sec$year
wide_matched_inc_sec$diff_treated_minus_control<-wide_matched_inc_sec$time_to_wom.1-wide_matched_inc_sec$time_to_wom.0

#long format#
long_matched_inc_sec<-reshape(wide_matched_inc_sec,
                              varying=c("record_id.1", 
                                        "record_id.0", 
                                        "year_first_wom.1",
                                        "year_first_wom.0",
                                        "time_to_wom.1", 
                                        "time_to_wom.0"),
                              idvar = c("matched", "year"),
                              direction="long")

long_matched_inc_sec<-rename(long_matched_inc_sec, treated_inc_sec=time)

### Drop out the NAs
long_matched_inc_sec<-na.omit(long_matched_inc_sec)
### Create a new column called "winner"
long_matched_inc_sec$winner<-"NA"
for (i in 1:nrow(long_matched_inc_sec))
{
  for (j in 1:nrow(long_matched_inc_sec))
  {
    if(j!=i)
    {
      if(long_matched_inc_sec$matched[i]==long_matched_inc_sec$matched[j])
      {
        if(long_matched_inc_sec$year_first_wom[i]<long_matched_inc_sec$year_first_wom[j])
        {
          long_matched_inc_sec$winner[i]<-1
        }
        else if(long_matched_inc_sec$year_first_wom[i]>long_matched_inc_sec$year_first_wom[j])
        {
          long_matched_inc_sec$winner[i]<-0
        }
        else if(long_matched_inc_sec$year_first_wom[i]==long_matched_inc_sec$year_first_wom[j])
        {
          long_matched_inc_sec$winner[i]<-"NA"
        }
        
      }
    }
  }
}

### Create a value called absolute value
long_matched_inc_sec$abs_val<-abs(long_matched_inc_sec$diff_treated_minus_control)

###Generate a column called between set rank
###Spilit into winner loser column
winner_inc_sec<-subset(long_matched_inc_sec,long_matched_inc_sec$winner==1)
loser_inc_sec<-subset(long_matched_inc_sec,long_matched_inc_sec$winner==0)
###Sort the subsets by "time_to_wom"
winner_inc_sec<-winner_inc_sec[rev(order(winner_inc_sec$time_to_wom)),]
loser_inc_sec<-loser_inc_sec[rev(order(loser_inc_sec$time_to_wom)),]
###Create the column called between set rank
winner_inc_sec$between_set_rank<-rank(winner_inc_sec$abs_val,ties.method="first")
loser_inc_sec$between_set_rank<-rank(loser_inc_sec$abs_val,ties.method="first")

###Merge the subsets again
all_inc_sec<-rbind(winner_inc_sec,loser_inc_sec)

###Create a column called pair and it is identical to between_set_rank
all_inc_sec$pair<-all_inc_sec$between_set_rank

###Sort the final dataset by pair
all_inc_sec_sorted<-all_inc_sec[order(all_inc_sec$pair),]
##need "winner" variable to be numeric
all_inc_sec_sorted$winner<-as.numeric(all_inc_sec_sorted$winner)


####QualCI

all_inc_sec_sorted<-all_inc_sec_sorted[, c("record_id", "treated_inc_sec", "winner", "between_set_rank","pair")]
table(all_inc_sec_sorted$pair) #9 pairs
between_rank_vector1<-c(1:9)
long_rank_quade_inc_sec<-quade.prep(data=all_inc_sec_sorted, 
                                    set="pair",
                                    treatment="treated_inc_sec",
                                    withinRank = "winner",
                                    unit="record_id",
                                    betweenRank = between_rank_vector1)

qout<-quade(long_rank_quade_inc_sec)
qout

#### plot####
#pdf("APPENDIX_Increase_secectors_9pair_June2020.pdf", height=5.15, width=5 ) 
par(mfrow=c(1,1), mar=c(5, 5, 5, 5), family="serif")
hist(qout$Q, main="Permuatation Distribution", xlab="Signed Rank Statistic", col=NULL)
segments(qout$Qobs, 0, qout$Qobs, y1=7000000, lty = 2, lwd = 2, col = "red")
text(35,65, "p=.41")
#text(15, 6000, "Full Sample")
#text(15, 5600, "15 Pairs")
mtext("Tr= Increase in Selcectors, Exact Matched")
#dev.off()

####Comparing matching variables for matched and unmatched countries, generally####
m1<-just_matched_tr1_full[c("record_id", "state","treated1", "year_first_wom")]
m1$treatment<-ifelse(m1$treated1==1, "t1", "c1")
m1$treated1<-NULL
#units with out data for DV drop out of our analysis, so dropping them here
m1$year_first_wom[is.na(m1$year_first_wom)] <- -999
m1<-m1[which(m1$year_first_wom>0),]



m2<-just_matched_tr2[c("record_id", "state", "treated2", "year_first_wom")]
m2$treatment<-ifelse(m2$treated2==1, "t2", "c2")
m2$treated2<-NULL
m2$year_first_wom[is.na(m2$year_first_wom)] <- -999
m2<-m2[which(m2$year_first_wom>0),]


m3<-just_matched_tr3[c("record_id", "state","treated3", "year_first_wom")]
m3$treatment<-ifelse(m3$treated3==1, "t3", "c3")
m3$treated3<-NULL
m3$year_first_wom[is.na(m3$year_first_wom)] <- -999
m3<-m3[which(m3$year_first_wom>0),]


all_matches<-rbind(m1, m2)
all_matches<-rbind(all_matches, m3)
all_matches$duplicate<-as.numeric(duplicated(all_matches$record_id))

table(all_matches$duplicate)
#View(all_matches)
all_matches<-all_matches[which(all_matches$duplicate==0),]

#all_matches are all the countries that are in our study as either a treatment or control for treatment 1, 2, and 3

#Now want to comparing the full sample to the matches
all_matches<-all_matches[c("record_id","treatment")]


compare<-merge(highcourts_full, all_matches, by="record_id", all.x=TRUE, all.y=TRUE)
compare$treatment[is.na(compare$treatment)] <- 0
table(compare$treatment)
compare$in_sample<-compare$treatment
compare$in_sample<-ifelse(compare$in_sample==0, "0", "1")
table(compare$in_sample)

compare<-compare[c("record_id", 
                   "percent_women_filled",
                   "since_UNIVSFFRG",   
                   "v2jupack_filled", 
                   "v2cldiscw_filled",
                   "v2clprptyw_filled",
                   "v2x_partipdem_filled",
                   "v2x_gender_filled",
                   "v2pepwrgen_filled",
                   "v2clacjstw_filled",
                   "v2clslavef_filled",
                   "v2cldmovew_filled",
                   "v2mefemjrn_filled",
                   "v2csgender_filled",
                   "v2x_egaldem_filled",
                   "v2xcl_rol_filled",
                   "v2meaccess",
                   "v2mecenefm",
                   "v2mecrit",
                   "e_GDP_Per_Cap_Haber_Men_2", 
                   "in_sample",
                   "oecd_indicator")]

##Table A4 -- the table is formatted differently but includes the following information: 
#in sample descriptive stats
stargazer(compare[which(compare$in_sample==1),], omit.summary.stat=c(  "p25","p75", "sd" ))
#out of sample stats
stargazer(compare[which(compare$in_sample==0),], omit.summary.stat=c( "p25","p75", "sd"))
#oecd stats
stargazer(compare[which(compare$oecd_indicator==1),], omit.summary.stat=c( "p25","p75", "sd"))
#global stats
stargazer(compare, omit.summary.stat=c( "p25","p75", "sd"))



####Package information####

####Session Info (July 20, 2020)
#R version 4.0.2 (2020-06-22)
#Platform: x86_64-apple-darwin17.0 (64-bit)
#Running under: macOS High Sierra 10.13.6

#Matrix products: default
#BLAS:   /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
#LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib

#locale:
#  [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

#attached base packages:
#  [1] tcltk     stats     graphics  grDevices utils     datasets  methods   base     

#other attached packages:
#  [1] plyr_1.8.6      qualCI_0.1      RItools_0.1-17  SparseM_1.78    optmatch_0.9-13 survival_3.1-12
#[7] MatchIt_3.0.2   cem_1.1.20      lattice_0.20-41 stargazer_5.2.2

#loaded via a namespace (and not attached):
#  [1] Rcpp_1.0.4.6        randomForest_4.6-14 digest_0.6.25       MASS_7.3-51.6      
#[5] grid_4.0.2          xtable_1.8-4        nlme_3.1-148        svd_0.5            
#[9] combinat_0.0-8      Matrix_1.2-18       splines_4.0.2       tools_4.0.2        
#[13] abind_1.4-5         compiler_4.0.2     

